Import intestine expression data
GFPplus_samples_rlog_counts <- read_table(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/rlog_counts/GFPplus_samples_rlog_counts.tsv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## WBGeneID = col_character(),
## embryo_GFPplus_rep1 = col_double(),
## embryo_GFPplus_rep2 = col_double(),
## embryo_GFPplus_rep3 = col_double(),
## L1_GFPplus_rep1 = col_double(),
## L1_GFPplus_rep3 = col_double(),
## L3_GFPplus_rep1 = col_double(),
## L3_GFPplus_rep2 = col_double(),
## L3_GFPplus_rep3 = col_double()
## )
Import intestine vs non-intestine expression data
res_embryoGFPplus_vs_embryoGFPminus_df <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/pairwise_DE_results/res_embryoGFPplus_vs_embryoGFPminus.csv", col_names = TRUE)
## Rows: 16762 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): WBGeneID
## dbl (6): baseMean, log2FoldChange, lfcSE, stat, pvalue, padj
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
res_L1GFPplus_vs_L1GFPminus_df <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/pairwise_DE_results/res_L1GFPplus_vs_L1GFPminus.csv", col_names = TRUE)
## Rows: 16762 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): WBGeneID
## dbl (6): baseMean, log2FoldChange, lfcSE, stat, pvalue, padj
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
res_L3GFPplus_vs_L3GFPminus_df <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/pairwise_DE_results/res_L3GFPplus_vs_L3GFPminus.csv", col_names = TRUE)
## Rows: 16762 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): WBGeneID
## dbl (6): baseMean, log2FoldChange, lfcSE, stat, pvalue, padj
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Import intestine categories
embryo_intestine_gene_categories <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/intestine_gene_categories/embryo_intestine_gene_categories.csv")
## Rows: 3250 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): WBGeneID, altHyp, intestine_expression
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
L1_intestine_gene_categories <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/intestine_gene_categories/L1_intestine_gene_categories.csv")
## Rows: 3400 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): WBGeneID, altHyp, intestine_expression
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
L3_intestine_gene_categories <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/intestine_gene_categories/L3_intestine_gene_categories.csv")
## Rows: 2646 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): WBGeneID, altHyp, intestine_expression
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Import promoter ChIP-seq data
L1.promoters.hilo <- read.table(file = "../../../David/01_promoters/03_output/L1.promoters.hilo.tsv") %>% rownames_to_column(var = "WBGeneID") %>% select(WBGeneID:IDR_nlogq)
L3.promoters.hilo <- read.table(file = "../../../David/01_promoters/03_output/L3.promoters.hilo.tsv") %>% rownames_to_column(var = "WBGeneID") %>% select(WBGeneID:IDR_nlogq)
LE.promoters.hilo <- read.table(file = "../../../David/01_promoters/03_output/LE.promoters.hilo.tsv") %>% rownames_to_column(var = "WBGeneID") %>% select(WBGeneID:IDR_nlogq)
elt2chip_embryo_intestine_df <- LE.promoters.hilo %>%
full_join(res_embryoGFPplus_vs_embryoGFPminus_df, by = "WBGeneID") %>%
left_join(embryo_intestine_gene_categories, by = "WBGeneID") %>%
left_join(GFPplus_samples_rlog_counts %>% select(WBGeneID, contains("embryo")), by = "WBGeneID") %>%
drop_na(baseMean) %>%
rowwise() %>%
mutate(mean_rlog = mean(embryo_GFPplus_rep1, embryo_GFPplus_rep2, embryo_GFPplus_rep3))
elt2chip_L1_intestine_df <- L1.promoters.hilo %>%
full_join(res_L1GFPplus_vs_L1GFPminus_df, by = "WBGeneID") %>%
left_join(L1_intestine_gene_categories, by = "WBGeneID") %>%
left_join(GFPplus_samples_rlog_counts %>% select(WBGeneID, contains("L1")), by = "WBGeneID") %>%
drop_na(baseMean) %>%
rowwise() %>%
mutate(mean_rlog = mean(L1_GFPplus_rep1, L1_GFPplus_rep3)) %>%
filter(log2FoldChange > -40)
elt2chip_L3_intestine_df <- L3.promoters.hilo %>%
full_join(res_L3GFPplus_vs_L3GFPminus_df, by = "WBGeneID") %>%
left_join(L3_intestine_gene_categories, by = "WBGeneID") %>%
left_join(GFPplus_samples_rlog_counts %>% select(WBGeneID, contains("L3")), by = "WBGeneID") %>%
drop_na(baseMean) %>%
rowwise() %>%
mutate(mean_rlog = mean(L3_GFPplus_rep1, L3_GFPplus_rep2, L3_GFPplus_rep3))
Hypothesis: Promoter localized ELT-2 ChIP-seq signal is positively correlated with intestine gene expression Setup: Use pearson correlation coefficient to measure correlation. Measure for all genes and genes that are intestine enriched.
regression_IDRpeak_rlog_intestine_type <- function(in_df){
in_df_na_filt <- in_df %>% drop_na(IDR_mean, intestine_expression) %>% filter(intestine_expression != "depleted")
in_df_na_filt %>%
ggplot(aes(x = log10(IDR_mean), y = mean_rlog)) +
geom_point(shape = 19, size = 0.5, alpha = 0.25, color = "black") +
geom_smooth(method = "lm", formula = y~x) +
geom_text(hjust = 0, x = min(log10(in_df_na_filt$IDR_mean)), y = max(in_df_na_filt$mean_rlog), aes(label = gene_total), data = in_df_na_filt %>% group_by(intestine_expression) %>% summarise(gene_total = paste0("# genes = ",n(), sep = ""))) +
ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$mean_rlog)) +
ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$mean_rlog)) +
facet_grid(.~intestine_expression) +
theme_classic() +
ggtitle(paste("data: ", deparse(substitute(in_df)), sep = ""))
}
regression_IDRpeak_rlog_all_genes <- function(in_df){
in_df_na_filt <- in_df %>% drop_na(IDR_mean)
in_df_na_filt%>%
ggplot(aes(x = log10(IDR_mean), y = mean_rlog)) +
geom_point(shape = 19, size = 0.5, alpha = 0.25, color = "black") +
geom_smooth(method = "lm", formula = y~x) +
annotate("text",x = min(log10(in_df_na_filt$IDR_mean)), y = max(in_df_na_filt$mean_rlog), label = paste0("# genes = ",nrow(in_df_na_filt), sep = ""), hjust = 0) +
ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$mean_rlog)) +
ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$mean_rlog)) +
theme_classic() +
ggtitle(paste("correlation with all genes\ndata: ", deparse(substitute(in_df)), sep = ""))
}
regression_promSignal_rlog_intestine_type <- function(in_df){
in_df_na_filt <- in_df %>% drop_na(intestine_expression, log_chip_signal_mean) %>% filter(intestine_expression != "depleted")
in_df_na_filt %>%
ggplot(aes(x = log_chip_signal_mean, y= mean_rlog)) +
geom_point(shape = 19, size = 0.25, alpha = 0.25, color = "black") +
geom_smooth(method = "lm", formula = y~x) +
ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$mean_rlog)) +
ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$mean_rlog)) +
geom_text(x = min(in_df_na_filt$log_chip_signal_mean), y = max(in_df_na_filt$mean_rlog), aes(label = gene_total), data = in_df_na_filt %>% group_by(intestine_expression) %>% summarise(gene_total = paste0("# genes = ",n(), sep = "")), hjust = 0) +
facet_grid(.~intestine_expression) +
theme_classic() +
ggtitle(paste("data: ", deparse(substitute(in_df)), sep = ""))
}
regression_promSignal_rlog_all_genes <- function(in_df){
in_df_na_filt<- in_df %>% drop_na(log_chip_signal_mean)
in_df_na_filt %>%
ggplot(aes(x = log_chip_signal_mean, y= mean_rlog)) +
geom_point(shape = 19, size = 0.25, alpha = 0.25, color = "black") +
geom_smooth(method = "lm", formula = y~x) +
ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$mean_rlog)) +
ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$mean_rlog)) +
annotate("text",x = min(in_df_na_filt$log_chip_signal_mean), y = max(in_df_na_filt$mean_rlog), label = paste0("# genes = ",nrow(in_df_na_filt), sep = ""), hjust = 0) +
theme_classic() +
ggtitle(paste("data: ", deparse(substitute(in_df)), sep = "")) +
coord_cartesian(xlim = c(min(in_df_na_filt$log_chip_signal_mean), max(in_df_na_filt$log_chip_signal_mean)),
ylim = c(min(in_df_na_filt$mean_rlog), max(in_df_na_filt$mean_rlog))
)
}
regression_IDRpeak_rlog_intestine_type(elt2chip_embryo_intestine_df)
# ggsave(filename = "../03_output/PearsonCorrelation_embryoGFPplus_vs_ELT2_ChIP_Seq_Bound.pdf", width = 6, height = 3, dpi = 300)
regression_IDRpeak_rlog_all_genes(elt2chip_embryo_intestine_df)
# ggsave(filename = "../03_output/PearsonCorrelation_embryoGFPplus_vs_ELT2_ChIP_Seq_Bound.pdf", width = 6, height = 3, dpi = 300)
regression_promSignal_rlog_intestine_type(elt2chip_embryo_intestine_df)
# ggsave(filename = "../03_output/PearsonCorrelation_embryoGFPplus_vs_ELT2_ChIP_Seq_All_Promoters.pdf", width = 6, height = 3, dpi = 300)
regression_promSignal_rlog_all_genes(elt2chip_embryo_intestine_df)
# ggsave(filename = "../03_output/PearsonCorrelation_embryoGFPplus_vs_ELT2_ChIP_Seq_Bound.pdf", width = 6, height = 3, dpi = 300)
regression_promSignal_rlog_all_genes(elt2chip_L1_intestine_df)
regression_promSignal_rlog_intestine_type(elt2chip_L1_intestine_df)
regression_IDRpeak_rlog_all_genes(elt2chip_L1_intestine_df)
regression_IDRpeak_rlog_intestine_type(elt2chip_L1_intestine_df)
regression_promSignal_rlog_all_genes(elt2chip_L3_intestine_df)
regression_promSignal_rlog_intestine_type(elt2chip_L3_intestine_df)
regression_IDRpeak_rlog_all_genes(elt2chip_L3_intestine_df)
regression_IDRpeak_rlog_intestine_type(elt2chip_L3_intestine_df)
elt2chip_embryo_intestine_df %>%
ggplot(aes(x = mean_rlog)) +
geom_histogram() +
# geom_point(shape = 19, size = 0.5, alpha = 0.1) +
# geom_smooth(method = "lm", formula = y~x) +
# stat_summary(fun.data = "mean_cl_boot") +
# ggpubr::stat_cor(method = "pearson") +
facet_grid(~intestine_expression)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
elt2chip_embryo_intestine_df %>%
ggplot(aes(x = log_chip_signal_mean)) +
geom_histogram() +
# geom_point(shape = 19, size = 0.5, alpha = 0.1) +
# geom_smooth(method = "lm", formula = y~x) +
# stat_summary(fun.data = "mean_cl_boot") +
# ggpubr::stat_cor(method = "pearson") +
facet_grid(~intestine_expression)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1135 rows containing non-finite values (stat_bin).
regression_IDRpeak_LFC_intestine_type <- function(in_df){
in_df_na_filt <- in_df %>% drop_na(IDR_mean, intestine_expression) %>% filter(intestine_expression != "depleted")
in_df_na_filt %>%
ggplot(aes(x = log10(IDR_mean), y = log2FoldChange)) +
geom_point(shape = 19, size = 0.5, alpha = 0.25, color = "black") +
geom_smooth(method = "lm", formula = y~x) +
geom_text(hjust = 0, x = min(log10(in_df_na_filt$IDR_mean)), y = max(in_df_na_filt$log2FoldChange), aes(label = gene_total), data = in_df_na_filt %>% group_by(intestine_expression) %>% summarise(gene_total = paste0("# genes = ",n(), sep = ""))) +
ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$log2FoldChange)) +
ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$log2FoldChange)) +
facet_grid(.~intestine_expression) +
theme_classic() +
ggtitle(paste("data: ", deparse(substitute(in_df)), sep = ""))
}
regression_IDRpeak_LFC_all_genes <- function(in_df){
in_df_na_filt <- in_df %>% drop_na(IDR_mean)
in_df_na_filt%>%
ggplot(aes(x = log10(IDR_mean), y = log2FoldChange)) +
geom_point(shape = 19, size = 0.5, alpha = 0.25, color = "black") +
geom_smooth(method = "lm", formula = y~x) +
annotate("text",x = min(log10(in_df_na_filt$IDR_mean)), y = max(in_df_na_filt$log2FoldChange), label = paste0("# genes = ",nrow(in_df_na_filt), sep = ""), hjust = 0) +
ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$log2FoldChange)) +
ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$log2FoldChange)) +
theme_classic() +
ggtitle(paste("correlation with all genes\ndata: ", deparse(substitute(in_df)), sep = ""))
}
regression_promSignal_LFC_intestine_type <- function(in_df){
in_df_na_filt <- in_df %>% drop_na(intestine_expression, log_chip_signal_mean) %>% filter(intestine_expression != "depleted")
in_df_na_filt %>%
ggplot(aes(x = log_chip_signal_mean, y= log2FoldChange)) +
geom_point(shape = 19, size = 0.25, alpha = 0.25, color = "black") +
geom_smooth(method = "lm", formula = y~x) +
ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$log2FoldChange)) +
ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$log2FoldChange)) +
geom_text(x = min(in_df_na_filt$log_chip_signal_mean), y = max(in_df_na_filt$log2FoldChange), aes(label = gene_total), data = in_df_na_filt %>% group_by(intestine_expression) %>% summarise(gene_total = paste0("# genes = ",n(), sep = "")), hjust = 0) +
facet_grid(.~intestine_expression) +
theme_classic() +
ggtitle(paste("data: ", deparse(substitute(in_df)), sep = ""))
}
regression_promSignal_LFC_all_genes <- function(in_df){
in_df_na_filt<- in_df %>% drop_na(log_chip_signal_mean)
in_df_na_filt %>%
ggplot(aes(x = log_chip_signal_mean, y= log2FoldChange)) +
geom_point(shape = 19, size = 0.25, alpha = 0.25, color = "black") +
geom_smooth(method = "lm", formula = y~x) +
ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$log2FoldChange)) +
ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$log2FoldChange)) +
annotate("text",x = min(in_df_na_filt$log_chip_signal_mean), y = max(in_df_na_filt$log2FoldChange), label = paste0("# genes = ",nrow(in_df_na_filt), sep = ""), hjust = 0) +
theme_classic() +
ggtitle(paste("correlation with all genes\ndata: ", deparse(substitute(in_df)), sep = "")) +
coord_cartesian(xlim = c(min(in_df_na_filt$log_chip_signal_mean), max(in_df_na_filt$log_chip_signal_mean)),
ylim = c(min(in_df_na_filt$log2FoldChange), max(in_df_na_filt$log2FoldChange))
)
}
regression_IDRpeak_LFC_all_genes(elt2chip_embryo_intestine_df)
regression_IDRpeak_LFC_intestine_type(elt2chip_embryo_intestine_df)
regression_promSignal_LFC_all_genes(elt2chip_embryo_intestine_df)
regression_promSignal_LFC_intestine_type(elt2chip_embryo_intestine_df)
regression_IDRpeak_LFC_all_genes(elt2chip_L1_intestine_df)
regression_IDRpeak_LFC_intestine_type(elt2chip_L1_intestine_df)
regression_promSignal_LFC_all_genes(elt2chip_L1_intestine_df)
regression_promSignal_LFC_intestine_type(elt2chip_L1_intestine_df)
regression_IDRpeak_LFC_all_genes(elt2chip_L3_intestine_df)
regression_IDRpeak_LFC_intestine_type(elt2chip_L3_intestine_df)
regression_promSignal_LFC_all_genes(elt2chip_L3_intestine_df)
regression_promSignal_LFC_intestine_type(elt2chip_L3_intestine_df)